Overview

This is a second round of Felipe’s capture experiment. The last round had several problems. The mapping rate for many of the samples was low, which indicates poor quality of the library. The rRNA mapping rate for the samples was also very high, upwards of 50% or higher of the reads were soaked up by rRNA. The exonic mapping rate was also low, in the 50% range for most of the samples. There was also a very low number of genes detected for many samples, less than 5,000 for several.

This new round was with a different capture kit that should hopefully fix some of these problems.

> library(ggplot2)
> library(reshape)
> library(gplots)
> library(edgeR)
> library(CHBUtils)
> library(pheatmap)
> library(DESeq2)
> library(tximport)
> library(logging)
> basicConfig()
> project_summary = "/Users/rory/cache/felipe-rnaseq/new-round/2016-02-08_felipe-new-round/project-summary.csv"
> counts_file = "/Users/rory/cache/felipe-rnaseq/new-round/2016-02-08_felipe-new-round/combined.counts"
> tx2genes_file = "/Users/rory/cache/felipe-rnaseq/new-round/2016-02-08_felipe-new-round/tx2gene.csv"
> cbPalette <- c("#999999", "#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", 
+     "#D55E00", "#CC79A7")
> summarydata = data.frame(read.table(project_summary, header = TRUE, sep = ","), 
+     row.names = "Name", check.rows = FALSE)
> summarydata$Name = rownames(summarydata)
> summarydata = summarydata[order(summarydata$Name), ]
> if (file.exists(tx2genes_file)) {
+     loginfo("Using gene counts calculated from the Sailfish transcript counts.")
+     sf_files = file.path("..", "..", rownames(summarydata), "sailfish", rownames(summarydata), 
+         "quant.sf")
+     names(sf_files) = rownames(summarydata)
+     tx2gene = read.table(tx2genes_file, sep = ",", row.names = NULL, header = FALSE)
+     txi.salmon = tximport(sf_files, type = "salmon", tx2gene = tx2gene, reader = readr::read_tsv, 
+         countsFromAbundance = "lengthScaledTPM")
+     counts = round(data.frame(txi.salmon$counts, check.names = FALSE))
+ } else {
+     loginfo("Using gene counts calculated from featureCounts.")
+     counts = read.table(counts_file, header = TRUE, row.names = "id", check.names = FALSE)
+ }
2016-05-28 16:14:48 INFO::Using gene counts calculated from the Sailfish transcript counts.
> counts = counts[, order(colnames(counts)), drop = FALSE]
> colnames(counts) = gsub(".counts", "", colnames(counts))
> 
> # this is a list of all non user-supplied metadata columns that could appear
> known_columns = c("Name", "X.GC", "Exonic.Rate", "Sequences.flagged.as.poor.quality", 
+     "rRNA_rate", "Fragment.Length.Mean", "Intronic.Rate", "Intergenic.Rate", 
+     "Mapping.Rate", "Quality.format", "Duplication.Rate.of.Mapped", "Mapped", 
+     "rRNA", "Sequence.length", "Transcripts.Detected", "Mean.Per.Base.Cov.", 
+     "Genes.Detected", "Unique.Starts.Per.Read", "unique_starts_per_read", "complexity", 
+     "X5.3.bias", "Duplicates.pct", "Duplicates", "Mapped.reads", "Median.insert.size", 
+     "Mapped.reads.pct", "Total.reads", "avg_coverage_per_region", "Mapped.Reads")
> # add the metadata from the original samples
> ometadata = read.table("metadata/felipe-new-round.csv", header = TRUE, sep = ",")
> summarydata$fraction = NULL
> summarydata$group = NULL
> summarydata$id = NULL
> summarydata$phenotype = NULL
> summarydata$treatment = NULL
> library(dplyr)
> summarydata = summarydata %>% left_join(ometadata, by = c(Name = "description"))
> rownames(summarydata) = summarydata$Name
> metadata = summarydata[, !colnames(summarydata) %in% known_columns, drop = FALSE]
> metadata = metadata[, c("treatment", "group", "phenotype")]
> sanitize_datatable = function(df, ...) {
+     # remove dashes which cause wrapping
+     DT::datatable(df, ..., rownames = gsub("-", "_", rownames(df)), colnames = gsub("-", 
+         "_", colnames(df)))
+ }
> # set seed for reproducibility
> set.seed(1454944673)

Sample metadata

> get_heatmap_fn = function(summarydata) {
+     # return the pheatmap function with or without metadata
+     if (ncol(metadata) == 0) {
+         return(pheatmap)
+     } else {
+         # rownames(metadata) = summarydata$Name
+         heatmap_fn = function(data, ...) {
+             pheatmap(data, annotation = metadata, ...)
+         }
+         return(heatmap_fn)
+     }
+ }
> heatmap_fn = get_heatmap_fn(summarydata)

Quality control metrics

> qualimap_run = "Mapped" %in% colnames(summarydata)

Mapped reads

> ggplot(summarydata, aes(x = Name, y = Mapped)) + theme_bw(base_size = 10) + 
+     theme(panel.grid.major = element_line(size = 0.5, color = "grey"), axis.text.x = element_text(angle = 90)) + 
+     geom_bar(stat = "identity") + ylab("mapped reads") + xlab("")

There are about 5-10 million mapped reads per sample, this is on the low side for doing a RNA-seq experiment, we’d usually be aiming for more like 20-30 million.

> ggplot(summarydata, aes(x = Name, y = Mapped.reads)) + theme_bw(base_size = 10) + 
+     theme(panel.grid.major = element_line(size = 0.5, color = "grey"), axis.text.x = element_text(angle = 90)) + 
+     geom_bar(stat = "identity") + ylab("mapped reads") + xlab("")

Genomic mapping rate

> ggplot(summarydata, aes(x = Name, y = Mapping.Rate)) + geom_bar(stat = "identity") + 
+     ylab("mapping rate") + xlab("") + theme_bw(base_size = 10) + theme(panel.grid.major = element_line(size = 0.5, 
+     color = "grey"), axis.text.x = element_text(angle = 90))

We can see some problems with these samples as well with the genomic mapping rate. HC-2, MS-6, MS-3 and MS-4 all have low mapping rates so something is wrong with these libraries.

> ggplot(summarydata, aes(x = Name, y = Mapped.reads.pct)) + geom_bar(stat = "identity") + 
+     ylab("mapping rate") + xlab("") + theme_bw(base_size = 10) + theme(panel.grid.major = element_line(size = 0.5, 
+     color = "grey"), axis.text.x = element_text(angle = 90))

Number of genes detected

> dd = data.frame(Name = colnames(counts), Genes.Detected = colSums(counts > 0))
> ggplot(dd, aes(x = Name, y = Genes.Detected)) + geom_bar(stat = "identity") + 
+     theme_bw(base_size = 10) + theme(panel.grid.major = element_line(size = 0.5, 
+     color = "grey"), axis.text.x = element_text(angle = 90)) + ylab("genes detected") + 
+     xlab("")

The number of genes detected plots look much much better than the plots from the last samples. The last samples had more like 7,500 genes detected. This means these libraries are much more complex than the previous libraries, which is very good news for running an analysis. There is a huge outlier, MS-5 has a huge number of genes detected compared to the other samples.

Exonic mapping rate

> ggplot(summarydata, aes(x = Name, y = Exonic.Rate)) + geom_bar(stat = "identity") + 
+     theme_bw(base_size = 10) + theme(panel.grid.major = element_line(size = 0.5, 
+     color = "grey"), axis.text.x = element_text(angle = 90)) + ylab("exonic mapping rate") + 
+     xlab("")

The exonic mapping rate for some of the samples is very low. MS-5, MS-6, MS-3 and MS-4 all have low mapping rates compared to the other samples.

rRNA mapping rate

> eval_rRNA = "rRNA_rate" %in% colnames(summarydata) & !sum(is.na(summarydata$rRNA_rate)) == 
+     nrow(summarydata)
> ggplot(summarydata, aes(x = Name, y = rRNA_rate)) + geom_bar(stat = "identity") + 
+     theme_bw(base_size = 10) + theme(panel.grid.major = element_line(size = 0.5, 
+     color = "grey"), axis.text.x = element_text(angle = 90)) + ylab("rRNA rate") + 
+     xlab("")

The rRNA mapping rate for the samples is in general very high. This is not as high as before where the average looked to be around 40%, but there are still a lot of reads in the rRNA genes.

Estimated fragment length of paired-end reads

> ggplot(summarydata, aes(x = Name, y = Fragment.Length.Mean)) + geom_bar(stat = "identity") + 
+     theme_bw(base_size = 10) + theme(panel.grid.major = element_line(size = 0.5, 
+     color = "grey"), axis.text.x = element_text(angle = 90)) + ylab("fragment length") + 
+     xlab("")

These fragments are very small, this indicates that there was likely some degradation in the libraries. There was no reason to run these libraries paired end because the fragment is small enough that a single read would cover it. This fragment length is smaller than before, the last run the length was closer to 100.

Boxplot of log10 counts per gene

> melted = melt(counts)
> colnames(melted) = c("sample", "count")
> melted$sample = factor(melted$sample)
> melted = melted[order(melted$sample), ]
> melted$count = log(melted$count)
> ggplot(melted, aes(x = sample, y = count)) + geom_boxplot() + theme_bw(base_size = 10) + 
+     theme(panel.grid.major = element_line(size = 0.5, color = "grey"), axis.text.x = element_text(angle = 90)) + 
+     xlab("")

The log counts per gene is more uniform than the last round of sequencing, which is good.

Boxplot of log10 TMM-normalized counts per gene

Trimmed mean of M-values (TMM) normalization is described here

Robinson, M. D., & Oshlack, A. (2010). A scaling normalization method for differential expression analysis of RNA-seq data. Genome Biology, 11(3). doi:10.1186/gb-2010-11-3-r25

> y = DGEList(counts = counts)
> y = calcNormFactors(y)
> normalized_counts = cpm(y, normalized.lib.sizes = TRUE)
> melted = melt(normalized_counts)
> colnames(melted) = c("gene", "sample", "count")
> melted$sample = factor(melted$sample)
> melted = melted[order(melted$sample), ]
> melted$count = log(melted$count)
> ggplot(melted, aes(x = sample, y = count)) + geom_boxplot() + theme_bw(base_size = 10) + 
+     theme(panel.grid.major = element_line(size = 0.5, color = "grey"), axis.text.x = element_text(angle = 90)) + 
+     xlab("")

TMM normalizing does a reasonable job normalizing the counts.

The samples don’t seem to cluster together by any of the metadata.

PCA plot

> dds = DESeqDataSetFromMatrix(countData = counts, colData = summarydata, design = ~Name)
> vst = varianceStabilizingTransformation(dds)
> plotPCA(vst, intgroup = c("treatment"))

> plotPCA(vst, intgroup = c("phenotype"))

> plotPCA(vst, intgroup = c("group"))

> plotPCA(vst, intgroup = c("rRNA_rate"))

> plotPCA(vst, intgroup = c("Mapped"))

> plotPCA(vst, intgroup = c("group", "treatment"))

> plotPCA(vst, intgroup = c("group", "phenotype"))

So similar to before, the libraries don’t seem to be clustering together properly at all. They don’t seem to cluster by rRNA rate or number of reads mapped either.

Felipe, could you double check that the samples and their metadata matched up properly? Could something have gotten swapped along the way?

> sanitize_datatable(metadata)

PCA uses the most variable genes to cluster the cells. What are the genes that are most variable between the samples? Maybe a clue about what is going on lies in their identities.

> library(biomaRt)
> human = useMart(biomart = "ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl", 
+     host = "jul2015.archive.ensembl.org")
> conversions = getBM(attributes = c("ensembl_gene_id", "hgnc_symbol", "gene_biotype", 
+     "chromosome_name"), mart = human)
> rrna_biotypes = c("rRNA", "Mt_rRNA", "misc_RNA", "snRNA", "snoRNA", "tRNA", 
+     "Mt_tRNA")
> rrna_genes = unique(subset(conversions, gene_biotype %in% rrna_biotypes)$ensembl_gene_id)
> library(matrixStats)
> vst_values = assay(vst)
> rv = data.frame(gene = rownames(vst_values), rv = rowVars(as.matrix(vst_values)))
> mostvar = rv[order(rv$rv, decreasing = TRUE), ] %>% left_join(conversions, by = c(gene = "ensembl_gene_id"))

I thought maybe these genes would be dominated by rRNA, but the top 500 most variant genes are mostly protein coding:

> knitr::kable(as.data.frame(table(head(mostvar, 500)$gene_biotype)))
Var1 Freq
antisense 21
lincRNA 21
misc_RNA 2
processed_pseudogene 12
processed_transcript 4
protein_coding 395
sense_intronic 1
sense_overlapping 1
snoRNA 1
snRNA 6
TR_C_gene 1
transcribed_processed_pseudogene 1
transcribed_unprocessed_pseudogene 6
unprocessed_pseudogene 1
> knitr::kable(as.data.frame(table(head(mostvar, 500)$chromosome_name)))
Var1 Freq
1 48
10 13
11 24
12 36
13 7
14 19
15 18
16 15
17 20
18 9
19 21
2 32
20 6
21 10
22 8
3 24
4 28
5 16
6 33
7 21
8 26
9 19
X 14
Y 6
> write.table(head(mostvar, 500)$gene, quote = FALSE, row.names = FALSE, col.names = FALSE, 
+     file = "500mostvar.txt")
> write.table(head(mostvar, 500)$hgnc_symbol, quote = FALSE, row.names = FALSE, 
+     col.names = FALSE, file = "500mostvar-symbol.txt")
> write.table(rownames(counts[rowMeans(counts) > 10, ]), quote = FALSE, row.names = FALSE, 
+     col.names = FALSE, file = "expressed.txt")

XIST tops the most variable list, are some of these samples females and the other samples males?

> sanitize_datatable(head(mostvar, 500))

XIST expression is highly variable between the samples, high in some and almost zero in the others:

> knitr::kable(counts["ENSG00000229807", ])
HC-1-L1 HC-1-L2 HC-2-L1 HC-2-L2 HC-3-L1 HC-3-L2 HC-4-L1 HC-4-L2 HC-5-L1 HC-5-L2 HC-6-L1 HC-6-L2 HC-7-L1 HC-7-L2 HC-8-L1 HC-8-L2 HC-9-L1 HC-9-L2 MS-1-L1 MS-1-L2 MS-2-L1 MS-2-L2 MS-3-L1 MS-3-L2 MS-4-L1 MS-4-L2 MS-5-L1 MS-5-L2 MS-6-L1 MS-6-L2 MS-7-L1 MS-7-L2 MS-8-L1 MS-8-L2 MS-9-L1 MS-9-L2
ENSG00000229807 1 1 97 83 13 2 14466 15677 3479 3985 6691 5791 1999 1908 3006 3073 22585 22526 1822 2082 1583 1544 17 70 1765 2391 95 43 0 0 1 1 0 0 0 0

XIST expression doesn’t explain the groupings though:

> vst$xist = vst_values["ENSG00000229807", ]
> plotPCA(vst, intgroup = c("xist"))

What about an ontology analysis of those top variable genes?

GO ontology of top 500 most variable genes

There is something different about these samples that are causing them to group.

I plugged the top 500 most variable genes into EnrichR and looked to see what cell types they might be markers for:

> enrichr_types = read.table("web/cell-types-top500var.txt", header = TRUE, sep = "\t")
> sanitize_datatable(enrichr_types)

Here is a heatmap of the counts for the genes associated with CD33+ myeloid cells:

> cd33_genes = unlist(strsplit(as.character(enrichr_types[1, "Genes"]), ";"))
> cd33_ids = subset(conversions, hgnc_symbol %in% cd33_genes)$ensembl_gene_id
> cd33_counts = subset(conversions, hgnc_symbol %in% cd33_genes)$ensembl_gene_id
> cd33_counts = log(counts[rownames(counts) %in% cd33_ids, ] + 1)
> rownames(cd33_counts) = conversions[match(rownames(cd33_counts), conversions$ensembl_gene_id), 
+     "hgnc_symbol"]
> heatmap_fn(cd33_counts, fontsize = 6)

Here is a heatmap of the counts for the genes in the CD14+ monocytes:

> library(pheatmap)
> cd14_genes = unlist(strsplit(as.character(enrichr_types[2, "Genes"]), ";"))
> cd14_ids = subset(conversions, hgnc_symbol %in% cd14_genes)$ensembl_gene_id
> cd14_counts = subset(conversions, hgnc_symbol %in% cd14_genes)$ensembl_gene_id
> cd14_counts = log(counts[rownames(counts) %in% cd14_ids, ] + 1)
> rownames(cd14_counts) = conversions[match(rownames(cd14_counts), conversions$ensembl_gene_id), 
+     "hgnc_symbol"]
> heatmap_fn(cd14_counts, fontsize = 6)

Could the sorting of the cells have grabbed different populations?

We can also throw up our hands and say we don’t know what the hidden factor is between the samples that is causing the variation and try to remove it but it is better to try to track down what could be going on

Below here is a placeholder to do the analysis what we figure out what is going on with the samples.

> # snagged from development version of DESeq
> DESeqDataSetFromTximport <- function(txi, colData, design, ...) {
+     counts <- round(txi$counts)
+     mode(counts) <- "integer"
+     dds <- DESeqDataSetFromMatrix(countData = counts, colData = colData, design = design, 
+         ...)
+     stopifnot(txi$countsFromAbundance %in% c("no", "scaledTPM", "lengthScaledTPM"))
+     if (txi$countsFromAbundance %in% c("scaledTPM", "lengthScaledTPM")) {
+         message("using length scaled TPM counts from tximport")
+     } else {
+         message("using counts and average transcript lengths from tximport")
+         lengths <- txi$length
+         dimnames(lengths) <- dimnames(dds)
+         assays(dds)[["avgTxLength"]] <- lengths
+     }
+     return(dds)
+ }
> 
> subset_tximport = function(txi, rows, columns) {
+     txi$counts = txi$counts[rows, columns]
+     txi$abundance = txi$abundance[rows, columns]
+     txi$length = txi$length[rows, columns]
+     return(txi)
+ }

Differential expression

Before we do differential expression we need to combine the replicate samples.

> summarydata$samplename = summarydata$sample
> combined = counts %>% add_rownames() %>% tidyr::gather(sample, count, -rowname) %>% 
+     left_join(summarydata, by = c(sample = "Name")) %>% dplyr::select(rowname, 
+     samplename, count) %>% group_by(rowname, samplename) %>% summarise(total = sum(count)) %>% 
+     tidyr::spread(samplename, total) %>% as.data.frame()
> rownames(combined) = combined$rowname
> combined$rowname = NULL
> metasum = unique(summarydata[, c("samplename", "fraction", "treatment", "group", 
+     "phenotype", "sample")])
> counts = combined
> summarydata = metasum
> rownames(summarydata) = summarydata$samplename
> library(DEGreport)
> library(vsn)
> # page 37 from DESeq2 manual
> # https://www.bioconductor.org/packages/3.3/bioc/vignettes/DESeq2/inst/doc/DESeq2.pdf
> summarydata$treatment = factor(summarydata$treatment, levels = c("natural", 
+     "LPS", "LPS+IL27"))
> summarydata$phenotype = factor(summarydata$phenotype, levels = c("noMS", "MS"))
> design = ~group + phenotype + treatment + phenotype:treatment

Above we set up to do the analysis by converting the treatment and the phenotype variables to factors and setting the “natural” treatment as the base level for the treatment and “nonMS” as the base level for the phenotype. Then we set up a design that

  1. controls for the group-wise effects
  2. looks at the effect of having MS vs not having MS on the natural cells
  3. looks at the effect of LPS and LPS+IL27 treatment on the nonMS cells
  4. looks at the effect of LPS and LPS+IL27 treatment on the MS cells

This sets us up to do differential expression.

> counts <- counts[rowSums(counts > 0) > 1, ]
> dds = DESeqDataSetFromMatrix(countData = counts, colData = summarydata, design = design)
> dds = estimateSizeFactors(dds)
> dds = DESeq(dds)
> febdds = dds
> save(febdds, file = "february-dds.RData")

This is the effect of MS vs non-MS when there is no treatment (natural):

> mseffect = results(dds, name = "phenotype_MS_vs_noMS")
> msaffected_genes = rownames(subset(mseffect, padj < 0.05))
> write.table(rownames(subset(mseffect, padj < 0.05)), file = "ms-effect.txt", 
+     row.names = FALSE, col.names = FALSE, quote = FALSE)
> write.table(rownames(mseffect), file = "expressed.txt", row.names = FALSE, col.names = FALSE, 
+     quote = FALSE)
> plotMA(mseffect)
> title("MS effect")

> stats = as.data.frame(mseffect[, c(2, 6)])
> volcano_density_plot(stats, title = "MS effect", lfc.cutoff = 1.5)

And a GO/KEGG enrichment of those genes, using the set of genes we detected in the cells as a background set and limiting the considered terms to ones which had at least 5 genes.

GO KEGG

This is the specific effect of LPS treatment on the MS samples. This tests if the effect of LPS treatment is different on the MS cells compared to the LPS treatment of the non-MS cells.

> lpsmseffect = results(dds, name = "phenotypeMS.treatmentLPS")
> lpsmsaffected_genes = rownames(subset(lpsmseffect, padj < 0.05))
> write.table(rownames(subset(lpsmseffect, padj < 0.05)), file = "lpsms-ms-effect.txt", 
+     row.names = FALSE, col.names = FALSE, quote = FALSE)
> plotMA(lpsmseffect)
> title("LPS effect on MS cells")

> stats = as.data.frame(lpsmseffect[, c(2, 6)])
> volcano_density_plot(stats, title = "LPS effect on MS cells", lfc.cutoff = 1.5)

And the GO/KEGG enrichment:

GO KEGG

This is the specific effect of LPS+IL27 treatment on the MS samples. This tests if the effect of LPS+IL27 treatment on the MS cells is different than the LPS+IL27 treatment on the non-MS cells.

> il27lpsmseffect = results(dds, name = "phenotypeMS.treatmentLPS.IL27")
> write.table(rownames(subset(il27lpsmseffect, padj < 0.05)), file = "il27lpsms-effect-ms.txt", 
+     row.names = FALSE, col.names = FALSE, quote = FALSE)
> plotMA(il27lpsmseffect)
> title("IL27+LPS effect on MS cells")

> stats = as.data.frame(il27lpsmseffect[, c(2, 6)])
> volcano_density_plot(stats, title = "IL27+LPS effect on MS cells", lfc.cutoff = 1.5)

And the GO/KEGG enrichment:

GO KEGG

Comparisons Felipe asked for

Over email Felipe asked for some comparisons similar to what we looked at for the April samples:

MS: ( LPS vs. LPS+Il27);
HC: (LPS vs. LPS+IL27);
MS LPS vs. HS LPS;
MS LPS+IL27; HC LPS+IL27
And here, in addition, not plated (ex-vivo) MS vs. not plated HC

We’ll re-fit a model here after we add a column of phenotype + treatment, and look for these differences.

> summarydata$phenotreat = paste(summarydata$phenotype, summarydata$treatment, 
+     sep = "_")
> design = ~group + phenotreat
> dds = DESeqDataSetFromMatrix(counts, colData = summarydata, design = design)
> dds = DESeq(dds)
> symbols = conversions[, c("ensembl_gene_id", "hgnc_symbol")]
> colnames(symbols) = c("id", "symbol")

MS LPS+IL27 vs LPS

This compares the LPS+IL27 treatment to the LPS treatment in MS cells. Higher fold changes mean the gene is higher in the LPS+IL27 treated samples.

> ms_il27_lps = results(dds, contrast = c("phenotreat", "MS_LPS.IL27", "MS_LPS"))
> stats = as.data.frame(ms_il27_lps[, c(2, 6)])
> volcano_density_plot(stats, title = "MS LPS+IL27 vs LPS", lfc.cutoff = 1.5)

> ms_il27_lps_sig = subset(ms_il27_lps, padj < 0.1)
> ms_il27_lps = cbind(data.frame(id = as.character(rownames(ms_il27_lps))), ms_il27_lps)
> ms_il27_lps = data.frame(ms_il27_lps) %>% left_join(symbols, by = "id")
> write.table(ms_il27_lps, file = "ms-il27-vs-lps.txt", row.names = FALSE, col.names = TRUE, 
+     quote = FALSE)

There are genes significantly different using an adjusted p-value cutoff of 0.1 between the MS LPS+IL27 treated and the MS LPS cells.

HC LPS+IL27 vs LPS

This compares the LPS+IL27 treatment to the LPS treatment in control cells. Higher fold changes mean the gene is higher in the LPS+IL27 treated samples.

> hc_il27_lps = results(dds, contrast = c("phenotreat", "noMS_LPS.IL27", "noMS_LPS"))
> write.table(hc_il27_lps, file = "hc-il27-vs-lps.txt", row.names = FALSE, col.names = FALSE, 
+     quote = FALSE)
> stats = as.data.frame(hc_il27_lps[, c(2, 6)])
> volcano_density_plot(stats, title = "HC LPS+IL27 vs LPS", lfc.cutoff = 1.5)

> hc_il27_lps_sig = subset(hc_il27_lps, padj < 0.1)
> hc_il27_lps = cbind(data.frame(id = rownames(hc_il27_lps)), hc_il27_lps)
> hc_il27_lps = data.frame(hc_il27_lps) %>% left_join(symbols, by = "id")
> write.table(hc_il27_lps, file = "hc-il27-vs-lps.txt", row.names = FALSE, col.names = TRUE, 
+     quote = FALSE)

There are 0 genes significantly different using an adjusted p-value cutoff of 0.1 between the HC LPS+IL27 treated and the HC LPS cells.

MS LPS vs HC LPS

This compares the LPS treated MS samples to the LPS treated HC samples. Higher fold changes mean the gene is higher in the LPS MS samples.

> ms_lps_hc_lps = results(dds, contrast = c("phenotreat", "MS_LPS", "noMS_LPS"))
> stats = as.data.frame(ms_lps_hc_lps[, c(2, 6)])
> volcano_density_plot(stats, title = "MS LPS vs HC LPS", lfc.cutoff = 1.5)

> ms_lps_hc_lps_sig = subset(ms_lps_hc_lps, padj < 0.1)
> ms_lps_hc_lps = cbind(data.frame(id = rownames(ms_lps_hc_lps)), ms_lps_hc_lps)
> ms_lps_hc_lps = data.frame(ms_lps_hc_lps) %>% left_join(symbols, by = "id")
> write.table(ms_lps_hc_lps, file = "ms-lps-vs-hc-lps.txt", row.names = FALSE, 
+     col.names = TRUE, quote = FALSE)

MS LPS+IL27 vs HC LPS+IL27

This compares the LPS+IL27 treated MS samples to the LPS+IL27 treated HC samples. Higher fold changes mean the gene is higher in the LPS+IL27 MS samples.

> ms_il27_hc_il27 = results(dds, contrast = c("phenotreat", "MS_LPS+IL27", "noMS_LPS+IL27"))
> stats = as.data.frame(ms_il27_hc_il27[, c(2, 6)])
> volcano_density_plot(stats, title = "MS LPS+IL27 vs HC LPS+IL27", lfc.cutoff = 1.5)

> ms_il27_hc_il27_sig = subset(ms_il27_hc_il27, padj < 0.1)
> ms_il27_hc_il27 = cbind(data.frame(id = rownames(ms_il27_hc_il27)), ms_il27_hc_il27)
> ms_il27_hc_il27 = data.frame(ms_il27_hc_il27) %>% left_join(symbols, by = "id")
> write.table(ms_il27_hc_il27, file = "ms-il27-vs-hc-il27.txt", row.names = FALSE, 
+     col.names = TRUE, quote = FALSE)

There are 0 genes significantly different using an adjusted p-value cutoff of 0.1 between the MS LPS+IL27 treated and the HC LPS+IL27 treated cells.

MS untreated vs HC untreated

This compares the MS untreated samples to the HC untreated samples. Higher fold changes mean the gene is higher in the MS untreated samples than the HC untreated samples.

> ms_natural_hc_natural = results(dds, contrast = c("phenotreat", "MS_natural", 
+     "noMS_natural"))
> stats = as.data.frame(ms_natural_hc_natural[, c(2, 6)])
> volcano_density_plot(stats, title = "MS natural vs HC natural", lfc.cutoff = 1.5)

> ms_natural_hc_natural_sig = subset(ms_natural_hc_natural, padj < 0.1)
> ms_natural_hc_natural = cbind(data.frame(id = rownames(ms_natural_hc_natural)), 
+     ms_natural_hc_natural)
> ms_natural_hc_natural = data.frame(ms_natural_hc_natural) %>% left_join(symbols, 
+     by = "id")
> write.table(ms_natural_hc_natural, file = "ms-natural-vs-hc-natural.txt", row.names = FALSE, 
+     col.names = TRUE, quote = FALSE)

There are 0 genes significantly different using an adjusted p-value cutoff of 0.1 between the untreated MS samples and the untreated HC samples.